/* File: formulary_exclusion_test.do
 * Author: Yunjuan Liu (small edits by Luca Maini)
 * Purpose: Tests the hypothesis that some products may be jointly excluded  
 *          detailed tests across channels and PBMs
 *          geruso regressions at formulary level
 *          geruso regressions across channels and pbmrelationship
 *          exclude "excluded drugs" and single-drug firms 
 * Last Updated: 06/09/2021
 *
 */

/////////////////////////////////////////////////////////////
/////													/////
/////	STEP 0. Get some list of drugs and companies	/////
/////													/////
/////////////////////////////////////////////////////////////

** Keep only first quarter and necessary variables
use "${maindir}/combined_regression_dataset_augmented.dta",clear

rename Product SSR_product

keep SSR_product mktCompany year

tempfile drug_companies
save "`drug_companies'", replace


/////////////////////////////////////////////////////////////
/////													/////
/////	STEP 1. Prepare formulary dataset for analysis	/////
/////													/////
/////////////////////////////////////////////////////////////

*** Merge formulary dataset with company information
use "${outdir}\allDrugs_yearly.dta", clear

* keep only commercial channel for this test
keep if channel == "COMMERCIAL":channel | ///
		channel == "HEALTH EXCHANGE":channel

* collapse to the formulary-year-drug level (keep channel info as well)
collapse (sum) lives, by(formularyid drugname channel year geruso) fast
drop if lives == 0

* cosmetic edits for merge
rename drugname MMIT_product
merge m:1 MMIT_product using "${inputdir}\MMIT_SSR_crosswalk.dta", nogen keep(match)
drop crosswalk_flag

* drop multiple MMIT names matched to the same SSR name, keep the highest geruso 
* coverage
collapse (max) geruso lives, by(SSR_product year formularyid channel) fast

* merge with company drug info
merge m:1 SSR_product year using "`drug_companies'", keep(match) nogen

* exclude companies with a single drug
bysort mktCompany year SSR_product : gen ind = _n == 1
bysort mktCompany year : egen tot_company_drugs = total(ind)
drop ind

keep if tot_company_drugs > 1

compress

* merge with ATC-3 info
tempfile main
save `main', replace

use "${inputdir}\ssr_product_atc4_codes.dta", clear
drop atc4
duplicates drop
rename Product SSR_product

joinby SSR_product using `main'
 


/////////////////////////////////////////
/////								/////
/////	STEP 2. Run the analysis	/////
/////								/////
/////////////////////////////////////////

*** Run regression and make the histogram

* Create an identifier for formulary-channel (sometimes the same formulary ID 
* is used in different channels)
egen xid = group(formularyid channel), missing

* Run regression
gen excluded = (geruso <= 1)

* Calculate counts of exclusions WITHIN class, and CROSS-MARKET
bysort mktCompany year xid SSR_product (atc3) : gen prod_ind = _n == 1

* count of excluded drug within ATC-3
bysort mktCompany year xid atc3 : egen num_drug_exclusions_atc3 = total(excluded)

* count of excluded drug in overall formulary (count each drug only once)
bysort mktCompany year xid : egen num_drug_exclusions = total(excluded * prod_ind)

* indicators for other exclusions in class and across classes
gen other_exclusions_atc3 = (num_drug_exclusions_atc3 - excluded > 0)
gen other_exclusions_x_market = (num_drug_exclusions - num_drug_exclusions_atc3 > 0)

* overall exclusions
gen other_exclusions_any = (num_drug_exclusions - excluded > 0)

* collapse to go back to drug-formulary-year level (take the max of each indicator)
collapse (max) excluded other_exclusions*, by(SSR_product mktCompany xid year) fast

* Indicators for fixed effects
egen prodID = group(SSR_product)
egen form_year = group(xid year)
egen prod_year = group(SSR_product year)

eststo: reghdfe excluded other_exclusions_any, absorb(prod_year form_year) cluster(prod_year)
eststo: reghdfe excluded other_exclusions_atc3, absorb(prod_year form_year) cluster(prod_year)
eststo: reghdfe excluded other_exclusions_x_market, absorb(prod_year form_year) cluster(prod_year)
eststo: reghdfe excluded other_exclusions_atc3 other_exclusions_x_market, absorb(prod_year form_year) cluster(prod_year)


/*
eststo: reghdfe excluded other_exclusions, absorb(year prodID xid) cluster(prod_year)
eststo: reghdfe excluded other_exclusions, absorb(prodID form_year) cluster(prod_year)
eststo: reghdfe excluded other_exclusions, absorb(prod_year form_year) cluster(prod_year)
*/

esttab using "${paperdir}\Tables\Table 3.csv", ///
		con ar2 se label replace nostar keep(other_exclusions*)
eststo clear

/*
bysort xid year mktCompany : gen tot_company_drugs_form = _N
gen frac_excluded_other = (num_drug_exclusions - excluded)/(tot_company_drugs_form - 1)

eststo: reghdfe excluded frac_excluded_other, absorb(year prodID xid) cluster(prod_year)
eststo: reghdfe excluded frac_excluded_other, absorb(year prodID form_year) cluster(prod_year)
eststo: reghdfe excluded frac_excluded_other, absorb(prod_year form_year) cluster(prod_year)

esttab using "${paperdir}\Tables\Table 3B.csv", ///
		con ar2 se label replace nostar keep(frac_excluded_other)
eststo clear	
*/

********* Histogram of coverage level by formulary and manufacturer ************

* first, calculate coverage of each formulary
bysort xid year : gen tot_form_drugs = _N
bysort xid year : egen tot_form_excluded = total(excluded)
gen frac_excluded = tot_form_excluded/tot_form_drugs

* now count fully excluded companies
bysort xid mktCompany : egen fully_excluded = min(excluded)
bysort xid year : egen tot_excluded_from_full_excl = total(fully_excluded)
gen frac_excluded_from_full_excl = tot_excluded_from_full_excl/tot_form_drugs

* how many of the non-covered drugs belong to a company that has a 
* covered drug in the same formulary
bysort xid mktCompany : egen has_covered_drug = max(excluded == 0)
gen excluded_from_covered_firm = excluded == 1 & has_covered_drug == 1
bysort xid year : egen tot_excluded_from_covered = total(excluded_from_covered_firm)

gen frac_excluded_from_covered = tot_excluded_from_covered/tot_form_drugs

* Collapse at the year level for graph
collapse (mean) frac_excluded frac_excluded_from_covered frac_excluded_from_full_excl, by(year) fast


* graph
graph set window fontface "${paperFont}"

twoway bar frac_excluded_from_full_excl frac_excluded year, ///
	xlabel(2011(1)2019, labsize(medsmall)) xtitle("") ///
	ylabel(0 "0%" 0.05 "5%" 0.1 "10%" 0.15 "15%" 0.2 "20%" 0.25 "25%", labsize(medsmall) nogrid) ///
	ytitle("{bf:Fraction of excluded drugs}") ///
	color(gs0 none) lcolor(none g) lwidth(none medium) ///
	barwidth(0.8 0.8) ///
	legend(order(2 "All exclusions" ///
				 1 "Exclusions of full company portfolios") cols(1) ///
		   ring(0) bplacement(11) size(medsmall) region(color(none)))
graph export "${paperdir}\Figures\Figure 9.pdf", as(pdf) replace
graph export "${paperdir}\Figures\Figure 9.eps", as(eps) replace


*** END CODE
